In [151]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
In [152]:
df = pd.read_csv('log.csv', parse_dates=['Joined', 'Left', 'Time'])
df['Time'] = df['Left'] - df['Joined']
df = df[df['Member Name'] != 'Sauron']
df
Out[152]:
Member Id Member Name Joined Left Time
0 210743674525450240 stratos1805 2021-04-27 02:21:53.125370 2021-04-27 02:33:41.455374 0 days 00:11:48.330004
1 132415133711466496 סדאם חוסיין ז"ל 2021-04-27 02:21:53.125370 2021-04-27 02:35:55.345365 0 days 00:14:02.219995
2 192642893159202816 MrStormagedon 2021-04-27 02:21:53.125370 2021-04-27 03:10:15.058915 0 days 00:48:21.933545
3 193006567455457280 Cardi Biton 2021-04-27 12:44:58.546623 2021-04-27 12:56:30.695541 0 days 00:11:32.148918
4 192986627998613504 rone 2021-04-27 13:55:12.166887 2021-04-27 14:14:01.761058 0 days 00:18:49.594171
... ... ... ... ... ...
356 208115226489389058 XPEZNAZ 2021-05-16 23:20:40.980030 2021-05-17 01:56:10.362019 0 days 02:35:29.381989
357 195596782267269120 Senlers 2021-05-16 23:50:56.933888 2021-05-17 01:56:23.484109 0 days 02:05:26.550221
358 193006567455457280 Cardi Biton 2021-05-17 01:49:50.946648 2021-05-17 02:22:44.480374 0 days 00:32:53.533726
359 439772519378190337 Kosta 2021-05-16 23:50:56.933888 2021-05-17 02:48:00.909960 0 days 02:57:03.976072
360 287543149633601536 Rey 2021-05-17 00:00:23.939672 2021-05-17 02:48:02.919619 0 days 02:47:38.979947

359 rows × 5 columns

In [153]:
min_datetime = df['Joined'].min()
max_datetime = df['Left'].max()
min_date, max_date = min_datetime.date(), max_datetime.date()
df = df.groupby('Member Name').filter(lambda x: x['Time'].sum().seconds / 3600 > 5)
unique_members = df['Member Name'].unique().tolist()

colormap = plt.cm.tab20(np.linspace(0, 1, len(unique_members)))
palette = [matplotlib.colors.to_hex(c) for c in colormap]

print(f'Data collected over {max_datetime - min_datetime} with {len(unique_members)} unique members')
Data collected over 20 days 00:26:09.794249 with 13 unique members
In [154]:
# sessions.index = pd.CategoricalIndex(sessions.index, unique_members)
# sessions = sessions.sort_values(ascending=False)
# sessions
# sessions.plot(use_index=True, kind='box', figsize=(20, 5), title='Average Session Length', xlabel='', ylabel='Hours', rot=0, color=colormap)
# plt.show()
plt.figure(figsize=(20, 5))
sns.boxplot(x='Member Name', y=df['Time'].dt.seconds / 3600, data=df, palette=palette)
plt.xlabel('')
plt.ylabel('Hours')
plt.show()
2021-05-17T03:09:22.654145 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [155]:
start = min_datetime.replace(minute=0, second=0, microsecond=0)
end = max_datetime.replace(minute=0, second=0, microsecond=0) + pd.offsets.Hour(1)
attendance_data = []
date = start
while date < end:
    members = df[(df['Joined'] <= date + pd.offsets.Minute(1)) & (df['Left'] >= date)]['Member Name'].to_list()
    attendance_data.append([date] + [member in members for member in unique_members])
    date += pd.offsets.Minute(1)
att_df = pd.DataFrame(attendance_data, columns=['Date'] + unique_members)
In [156]:
axes = att_df.set_index('Date').rolling(1).mean().plot(figsize=(14, 24), yticks=[0, 1], subplots=True, sharex=True, kind='area', color=colormap)
for ax in axes:
    ax.set_yticklabels(['Disconnected', 'Connected'])
plt.subplots_adjust()
plt.show()
2021-05-17T03:09:43.233004 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [157]:
def part_of_day(date):
    if 0 <= date.hour < 6:
        return '00:00 - 05:59'
    elif 6 <= date.hour < 12:
        return '06:00 - 11:59'
    elif 12 <= date.hour < 18:
        return '12:00 - 17:59'
    else:
        return '18:00 - 23:59'

weekday_order = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

daily_att = att_df.groupby(att_df['Date'].dt.day_name()).sum() / 60
daily_att.index = pd.CategoricalIndex(daily_att.index, weekday_order)
daily_att = daily_att.sort_index()
hourly_att = att_df.groupby(att_df['Date'].apply(part_of_day)).sum() / 60
all_att = att_df.sum() / 60

daily_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Daily Attendance', xlabel='Weekday', rot=0, color=colormap)
plt.show()
hourly_att.plot(use_index=True, kind='bar', figsize=(20, 5), title='Hourly Attendance', xlabel='Time', rot=0, color=colormap)
plt.show()
all_att.sort_values(ascending=False).plot(use_index=True, kind='bar', figsize=(20, 5), title='Total Attendance', xlabel=None, ylabel='Hours', rot=0, color=colormap)
plt.show()
2021-05-17T03:09:47.500699 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-17T03:09:48.144489 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
2021-05-17T03:09:48.491071 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [158]:
corr = att_df.corr()
corr
Out[158]:
stratos1805 סדאם חוסיין ז"ל MrStormagedon Cardi Biton rone יניבצ'וק October Detka XPEZNAZ NightSeeker Groovy xWarLord_Sharon OMGitsyuvalT
stratos1805 1.000000 0.592933 0.342003 0.337506 0.131281 0.292580 0.387685 0.101028 0.347009 0.338922 0.175046 0.001247 0.140812
סדאם חוסיין ז"ל 0.592933 1.000000 0.446480 0.382721 0.186916 0.316729 0.388446 0.194582 0.489101 0.316102 0.267469 0.035960 0.176787
MrStormagedon 0.342003 0.446480 1.000000 0.256128 0.105088 0.277449 0.357718 0.309785 0.252681 0.228611 0.194031 -0.014640 0.042537
Cardi Biton 0.337506 0.382721 0.256128 1.000000 0.152628 0.287613 0.352454 0.040106 0.259832 0.185620 0.049448 -0.005874 0.154327
rone 0.131281 0.186916 0.105088 0.152628 1.000000 0.300955 0.201838 -0.016336 0.253505 0.077589 0.165439 0.382026 0.094050
יניבצ'וק 0.292580 0.316729 0.277449 0.287613 0.300955 1.000000 0.660227 -0.033998 0.194661 0.180584 0.099810 0.095324 0.160021
October 0.387685 0.388446 0.357718 0.352454 0.201838 0.660227 1.000000 -0.017479 0.243361 0.141978 0.176216 0.018678 0.021348
Detka 0.101028 0.194582 0.309785 0.040106 -0.016336 -0.033998 -0.017479 1.000000 0.098388 0.218674 0.043587 -0.021572 -0.024728
XPEZNAZ 0.347009 0.489101 0.252681 0.259832 0.253505 0.194661 0.243361 0.098388 1.000000 0.363028 0.105033 -0.034092 0.137446
NightSeeker 0.338922 0.316102 0.228611 0.185620 0.077589 0.180584 0.141978 0.218674 0.363028 1.000000 0.090071 -0.022949 0.036726
Groovy 0.175046 0.267469 0.194031 0.049448 0.165439 0.099810 0.176216 0.043587 0.105033 0.090071 1.000000 -0.016040 0.057922
xWarLord_Sharon 0.001247 0.035960 -0.014640 -0.005874 0.382026 0.095324 0.018678 -0.021572 -0.034092 -0.022949 -0.016040 1.000000 -0.012447
OMGitsyuvalT 0.140812 0.176787 0.042537 0.154327 0.094050 0.160021 0.021348 -0.024728 0.137446 0.036726 0.057922 -0.012447 1.000000
In [159]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr, cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-17T03:09:48.862078 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [160]:
plt.figure(figsize=(20, 20))
sns.heatmap(corr[(abs(corr) > 0.3)], cmap='YlGnBu')
plt.xticks(rotation=0)
plt.show()
2021-05-17T03:09:49.468498 image/svg+xml Matplotlib v3.4.1, https://matplotlib.org/
In [161]:
pd.DataFrame(corr.unstack().sort_values().drop_duplicates())
Out[161]:
0
XPEZNAZ xWarLord_Sharon -0.034092
Detka יניבצ'וק -0.033998
OMGitsyuvalT -0.024728
xWarLord_Sharon NightSeeker -0.022949
Detka -0.021572
... ... ...
MrStormagedon סדאם חוסיין ז"ל 0.446480
סדאם חוסיין ז"ל XPEZNAZ 0.489101
stratos1805 0.592933
October יניבצ'וק 0.660227
stratos1805 stratos1805 1.000000

79 rows × 1 columns